#!/usr/bin/env python
# encoding=utf-8
import re
from xtls.codehelper import timeit
try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage


@timeit
def convert(fp):
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    device = TextConverter(rsrcmgr, retstr, laparams=LAParams())
    interpreter = PDFPageInterpreter(rsrcmgr, device)

    for page in PDFPage.get_pages(fp, set()):
        interpreter.process_page(page)

    text = retstr.getvalue()

    device.close()
    retstr.close()
    return re.sub(ur'[\n]+', '\n', text.decode('utf-8'))


if __name__ == '__main__':
    with open('/home/xlzd/abc.pdf', 'rb') as fp:
        fp = StringIO(fp.read())
    # StringIO()
    print len(convert(fp))
